###################################################################
### Oil Theft and Violence in Mexico                           ####
### Figures and tables in the main text                        ####                  
### This version: May 17, 2023                                 ####                   
###################################################################

#---------------------------------------------------#
# Preliminaries                                     #
#---------------------------------------------------#

rm(list = ls())

#List of packages for session
.packages = c("tidyverse", "lfe", "texreg",
              "ggplot2")

#Install CRAN packages (if not already installed)
.inst = .packages %in% installed.packages()
if(length(.packages[!.inst]) > 0) install.packages(.packages[!.inst])

#Loading packages into session 
lapply(.packages, require, character.only=TRUE)

# Set Working Directory to wherever files are downloaded
#setwd("")

#---------------------------------------------------#
# Loading Data                                      #
#---------------------------------------------------#

#Main datasets for regression analysis and figures

##Master dataset at the locality level
master_locality <- read.csv("master_locality.csv") %>%
  dplyr::select(-1)

##Master dataset at the municipal level
master_municipality <- read.csv("master_municipality.csv") %>%
  dplyr::select(-1)

###############################################
#########MUNICIPALITY-LEVEL ANALYSIS ##########                     
###############################################

#The first step is to subset to the different comparison groups:

#Note: this is required for the models in the Supporting Information as well
  
#(a) Pipeline versus Adjacent with No Pipeline
#(b) Adjacent with No Pipeline versus Non-Adjacent

#Adjacent No Pipeline vs Pipeline

pipeline_vs_adjnopipe = master_municipality %>%
  filter(fuel_pipeline == 1 | adjacent_nopipeline == 1) %>%
  dplyr::select(mun_id, year_month, year, young_male_murder_rate,
                oil_price_usd, fuel_pipeline, adjacent_nopipeline, 
                extortion_rate, kidnapping_rate,
                young_female_murder_rate, young_male_diabetes_rate, labs)

#Adjacent No Pipeline vs Non Adjacent

pipeline_vs_nonadj = master_municipality %>%
  filter(adjacent_nopipeline == 1 | notadjacent == 1) %>%
  dplyr::select(mun_id, year_month, year, young_male_murder_rate,
                oil_price_usd, adjacent_nopipeline, notadjacent, 
                extortion_rate, kidnapping_rate,
                young_female_murder_rate, young_male_diabetes_rate, labs)

#Pipeline vs Violent no pipeline

pipeline_vs_violent_nopipeline = master_municipality %>%
  filter(adjacent_nopipeline == 0) %>%
  filter(fuel_pipeline == 1 | violent_nopipe == 1) %>%
  dplyr::select(mun_id, year_month, year, young_male_murder_rate,
                oil_price_usd, fuel_pipeline, violent_nopipe, 
                extortion_rate, kidnapping_rate,
                young_female_murder_rate, young_male_diabetes_rate, labs)

#Pipeline vs no adjacent no pipeline

pipeline_vs_noadj_nopipe <- master_municipality %>%
  filter(adjacent_nopipeline == 0) %>%
  filter(fuel_pipeline == 1 | notadjacent == 1) %>%
  dplyr::select(mun_id, year_month, year, young_male_murder_rate,
                oil_price_usd, fuel_pipeline, notadjacent, 
                extortion_rate, kidnapping_rate,
                young_female_murder_rate, young_male_diabetes_rate, labs)

###############################################
############ Main Text -- Figure 3 ############                     
###############################################

#Figure 3.  Main Results: Coefficients for the interaction between prices and gasoline pipeline presence. 
#Mod 1 compares municipalities with pipeline vs. municipalities without pipelines; 
#Mod 2 compares municipalities with pipelines vs. their neighbors; 
#Mod 3 compares neighbor municipalities with municipalities without pipelines; 
#Mod 4 compares municipalities with pipelines with municipalities without pipelines with high levels of violence.

##Table

modmun1_main <- felm(young_male_murder_rate ~ fuel_pipeline*oil_price_usd | mun_id + year_month | 0 | mun_id, 
                     data = master_municipality)

modmun2_main <- felm(young_male_murder_rate ~ fuel_pipeline*oil_price_usd |  mun_id + year_month | 0 | mun_id, 
                     data = pipeline_vs_adjnopipe)

modmun3_main <- felm(young_male_murder_rate ~ fuel_pipeline*oil_price_usd |  mun_id + year_month | 0 | mun_id, 
                     data = pipeline_vs_noadj_nopipe)

modmun4_main <- felm(young_male_murder_rate ~ fuel_pipeline*oil_price_usd |  mun_id + year_month | 0 | mun_id, 
                     data = pipeline_vs_violent_nopipeline)

texreg::texreg(list(modmun1_main, modmun2_main, modmun3_main, modmun4_main), digits = 6, include.ci = TRUE)

##Coefficient Plot

coefplot::multiplot(modmun1_main, modmun2_main, modmun3_main, modmun4_main, horizontal = F,
                    coefficients = c("fuel_pipeline:oil_price_usd"),
                    newNames = c("fuel_pipeline:oil_price_usd" = "Fuel Pipeline * Price"),
                    names = c("Mod1", "Mod2", "Mod3", "Mod4"),
                    zeroColor = "black",  pointSize = 2.5, lwdInner = 0.6) +
  scale_color_brewer(palette = "Dark2") + theme_light()

###############################################
############ Main Text -- Figure 4 ############                     
###############################################

#Figure 4. Heterogeneous Effects: Population.

##Table

modmun_hetpop_a <- felm(young_male_murder_rate ~ fuel_pipeline*oil_price_usd | mun_id + year_month | 0 | mun_id, 
                        data = subset(master_municipality, total_pop > 0 & total_pop <= 5000))
modmun_hetpop_b <- felm(young_male_murder_rate ~ fuel_pipeline*oil_price_usd | mun_id + year_month | 0 | mun_id, 
                        data = subset(master_municipality, total_pop > 5000 & total_pop <= 10000))
modmun_hetpop_c <- felm(young_male_murder_rate ~ fuel_pipeline*oil_price_usd | mun_id + year_month | 0 | mun_id, 
                        data = subset(master_municipality, total_pop > 10000 & total_pop <= 20000))
modmun_hetpop_d <- felm(young_male_murder_rate ~ fuel_pipeline*oil_price_usd | mun_id + year_month | 0 | mun_id, 
                        data = subset(master_municipality, total_pop > 20000 & total_pop <= 50000))
modmun_hetpop_e <- felm(young_male_murder_rate ~ fuel_pipeline*oil_price_usd | mun_id + year_month | 0 | mun_id, 
                        data = subset(master_municipality, total_pop > 50000 & total_pop <= 100000))
modmun_hetpop_f <- felm(young_male_murder_rate ~ fuel_pipeline*oil_price_usd | mun_id + year_month | 0 | mun_id, 
                        data = subset(master_municipality, total_pop > 100000))

texreg::texreg(list(modmun_hetpop_a, modmun_hetpop_b, modmun_hetpop_c,
                    modmun_hetpop_d, modmun_hetpop_e, modmun_hetpop_f), digits = 6, include.ci = TRUE)

##Coefficient Plot

coefplot::multiplot(modmun_hetpop_a, modmun_hetpop_b, modmun_hetpop_c,
                    modmun_hetpop_d, modmun_hetpop_e, modmun_hetpop_f, horizontal = F,
                    coefficients = c("fuel_pipeline:oil_price_usd"),
                    newNames = c("fuel_pipeline:oil_price_usd" = "Fuel Pipeline \n * Price"),
                    names = c("Mod1: <5K", "Mod2: 5K-10K", "Mod3: 10K-20K", 
                              "Mod4: 20K-50K", "Mod5: 50K-100K", "Mod6: >100K"),
                    zeroColor = "black",  pointSize = 2.3, lwdInner = 0.6) +
  scale_color_brewer(palette = "Spectral") + theme_light()

###############################################
############ Main Text -- Figure 5 ############                     
###############################################

#Figure 5. Heterogeneous Effects: Income

##Table

modmun_hetinc_a <- felm(young_male_murder_rate ~ fuel_pipeline*oil_price_usd | mun_id + year_month | 0 | mun_id, 
                        data = subset(master_municipality, IM_2010 <= -0.6179))
modmun_hetinc_b <- felm(young_male_murder_rate ~ fuel_pipeline*oil_price_usd | mun_id + year_month | 0 | mun_id, 
                        data = subset(master_municipality, IM_2010 > -0.6179 & IM_2010 <= -0.2380))
modmun_hetinc_c <- felm(young_male_murder_rate ~ fuel_pipeline*oil_price_usd | mun_id + year_month | 0 | mun_id, 
                        data = subset(master_municipality, IM_2010 < -0.2380 & IM_2010 <= 0.2257))
modmun_hetinc_d <- felm(young_male_murder_rate ~ fuel_pipeline*oil_price_usd | mun_id + year_month | 0 | mun_id, 
                        data = subset(master_municipality, IM_2010 > 0.2257))

texreg::texreg(list(modmun_hetinc_a, modmun_hetinc_b, 
                    modmun_hetinc_c, modmun_hetinc_d), digits = 6, include.ci = TRUE)

##Coefficient Plot

coefplot::multiplot(modmun_hetinc_a, modmun_hetinc_b, modmun_hetinc_c,
                    modmun_hetinc_d, horizontal = F,
                    coefficients = c("fuel_pipeline:oil_price_usd"),
                    newNames = c("fuel_pipeline:oil_price_usd" = "Fuel Pipeline \n * Global Price"),
                    names = c("Mod1: Low Income", "Mod2: Medium Low Income", 
                              "Mod3: Medium High Income", "Mod4: High Income"),
                    zeroColor = "black",  pointSize = 2.3, lwdInner = 0.6) +
  scale_color_brewer(palette = "Spectral") + theme_light()

###############################################
############ LOCALITY-LEVEL ANALYSIS ##########                     
###############################################

#As an intermediate step, it's necessary to identify what municipalities have a pipeline, which ones are neighbors of those with pipelines, and which ones are non-neighbors without pipelines

adjacency_pipelines_mun <- master_municipality %>%
  dplyr::select(mun_id, year_month, fuel_pipeline, adjacent_nopipeline, notadjacent) %>%
  group_by(mun_id) %>%
  summarise(adjacent_nopipeline = mean(adjacent_nopipeline, na.rm = T),
            notadjacent = mean(notadjacent, na.rm = T),
            fuel_pipeline = mean(fuel_pipeline, na.rm = T)) %>%
  mutate(mun_id = str_pad(mun_id, 5, pad = "0"))

#Merge with locality level data

master_locality2 <- master_locality %>%
  mutate(mun_id = str_pad(mun_id, 3, pad = "0"),
         state = str_pad(state, 2, pad = "0"),
         mun_id = paste(state, mun_id, sep = "")) %>%
  left_join(adjacency_pipelines_mun, by = "mun_id")

#Filtering subsamples

##Localities in municipalities adjacent to one with pipeline (but without pipeline themselves)

adjnopipe_loc = master_locality2 %>%
  filter(adjacent_nopipeline == 1) %>%
  dplyr::select(loc_unique_id, mun_id, year_month, above_mun_mean,
                oil_price_usd, adjacent_nopipeline,
                close5km, close10km, close15km, close20km, close25km, close30km)

#Localities in municipalities adjacent to one with a pipeline (but without pipeline themselves) and not adjacent

pipeline_neighbor_vs_nonadj_loc = master_locality2 %>%
  filter(adjacent_nopipeline == 1 | notadjacent == 1) %>%
  dplyr::select(loc_unique_id, mun_id, year_month, above_mun_mean,
                oil_price_usd, adjacent_nopipeline, notadjacent,
                close5km, close10km, close15km, close20km, close25km, close30km)

###############################################
############ Main Text -- Figure 6 ############                     
###############################################

#Figure 6. Spatial Spillovers of Oil-Related Violence at the Locality Level: All Municipalities.
#Coefficients for the interaction between prices and distance to pipeline for localities within different distances of a pipeline.The dependent variable is the homicide rate for men from 19 to 39 years old

##Table

#Pipelines v. Rest

mod1_fuelviolence5 <- felm(above_mun_mean ~ close5km*oil_price_usd | loc_unique_id + year_month |
                             0 | loc_unique_id, data = master_locality2)
mod1_fuelviolence10 <- felm(above_mun_mean ~ close10km*oil_price_usd | loc_unique_id + year_month |
                              0 | loc_unique_id, data = master_locality2)
mod1_fuelviolence15 <- felm(above_mun_mean ~ close15km*oil_price_usd | loc_unique_id + year_month |
                              0 | loc_unique_id, data = master_locality2)
mod1_fuelviolence20 <- felm(above_mun_mean ~ close20km*oil_price_usd | loc_unique_id + year_month |
                              0 | loc_unique_id, data = master_locality2)
mod1_fuelviolence25 <- felm(above_mun_mean ~ close25km*oil_price_usd | loc_unique_id + year_month |
                              0 | loc_unique_id, data = master_locality2)
mod1_fuelviolence30 <- felm(above_mun_mean ~ close30km*oil_price_usd | loc_unique_id + year_month |
                              0 | loc_unique_id, data = master_locality2)

texreg::texreg(list(mod1_fuelviolence5, mod1_fuelviolence10,
                    mod1_fuelviolence15, mod1_fuelviolence20,
                    mod1_fuelviolence25, mod1_fuelviolence30), digits = 6, include.ci = TRUE)

##Coefficient Plot

coefplot::multiplot(mod1_fuelviolence5, mod1_fuelviolence10, 
                    mod1_fuelviolence15, mod1_fuelviolence20, 
                    mod1_fuelviolence25, mod1_fuelviolence30, horizontal = F,
                    coefficients = c("close5km:oil_price_usd",
                                     "close10km:oil_price_usd",
                                     "close15km:oil_price_usd",
                                     "close20km:oil_price_usd",
                                     "close25km:oil_price_usd",
                                     "close30km:oil_price_usd"),
                    newNames = c("close5km:oil_price_usd" = "Close 5KM * Oil Price",
                                 "close10km:oil_price_usd" = "Close 10KM * Oil Price",
                                 "close15km:oil_price_usd" = "Close 15KM * Oil Price",
                                 "close20km:oil_price_usd" = "Close 20KM * Oil Price",
                                 "close25km:oil_price_usd" = "Close 25KM * Oil Price",
                                 "close30km:oil_price_usd" = "Close 30KM * Oil Price"),
                    names = c("Mod1", "Mod2", "Mod3", "Mod4", "Mod5", "Mod6"),
                    zeroColor = "black",  pointSize = 2.3, lwdInner = 0.6) +
  scale_color_brewer(palette = "Dark2") + theme_light()

###############################################
############ Main Text -- Figure 7 ############                     
###############################################

#Figure 7. Spatial Spillovers of Oil-Related Violence at the Locality Level: Municipalities with Pipelines and Their Neighbors.
#Coefficients for the interaction between prices and distance to pipeline for localities within different distances of a pipeline in neighboring municipalities.The dependent variable is the homicide rate for men from 19 to 39 years old

##Table

#Pipelines v. Adjacent

mod2_fuelviolence5 <- felm(above_mun_mean ~ close5km*oil_price_usd | loc_unique_id + year_month |
                             0 | loc_unique_id, data = adjnopipe_loc)
mod2_fuelviolence10 <- felm(above_mun_mean ~ close10km*oil_price_usd | loc_unique_id + year_month |
                              0 | loc_unique_id, data = adjnopipe_loc)
mod2_fuelviolence15 <- felm(above_mun_mean ~ close15km*oil_price_usd | loc_unique_id + year_month |
                              0 | loc_unique_id, data = adjnopipe_loc)
mod2_fuelviolence20 <- felm(above_mun_mean ~ close20km*oil_price_usd | loc_unique_id + year_month |
                              0 | loc_unique_id, data = adjnopipe_loc)
mod2_fuelviolence25 <- felm(above_mun_mean ~ close25km*oil_price_usd | loc_unique_id + year_month |
                              0 | loc_unique_id, data = adjnopipe_loc)
mod2_fuelviolence30 <- felm(above_mun_mean ~ close30km*oil_price_usd | loc_unique_id + year_month |
                              0 | loc_unique_id, data = adjnopipe_loc)

texreg::texreg(list(mod2_fuelviolence5, mod2_fuelviolence10,
                    mod2_fuelviolence15, mod2_fuelviolence20,
                    mod2_fuelviolence25, mod2_fuelviolence30), digits = 6, include.ci = TRUE)

##Coefficient Plot

coefplot::multiplot(mod2_fuelviolence5, mod2_fuelviolence10, 
                    mod2_fuelviolence15, mod2_fuelviolence20, 
                    mod2_fuelviolence25, mod2_fuelviolence30, horizontal = F,
                    coefficients = c("close5km:oil_price_usd",
                                     "close10km:oil_price_usd",
                                     "close15km:oil_price_usd",
                                     "close20km:oil_price_usd",
                                     "close25km:oil_price_usd",
                                     "close30km:oil_price_usd"),
                    newNames = c("close5km:oil_price_usd" = "Close 5KM * Oil Price",
                                 "close10km:oil_price_usd" = "Close 10KM * Oil Price",
                                 "close15km:oil_price_usd" = "Close 15KM * Oil Price",
                                 "close20km:oil_price_usd" = "Close 20KM * Oil Price",
                                 "close25km:oil_price_usd" = "Close 25KM * Oil Price",
                                 "close30km:oil_price_usd" = "Close 30KM * Oil Price"),
                    names = c("Mod1", "Mod2", "Mod3", "Mod4", "Mod5", "Mod6"),
                    zeroColor = "black",  pointSize = 2.3, lwdInner = 0.6) +
  scale_color_brewer(palette = "Dark2") + theme_light()

###############################################
############ Main Text -- Figure 8 ############                     
###############################################

#Figure 8. Effect on Number of Criminal Organizations
#Coefficients from the interaction of pipeline presence and oil price (Mod 1), and for pipeline presence (Mod 2). The dependent variable is the number of criminal organizations in a given municipality.

##Table

master_municipality_yearly <- master_municipality %>%
  filter(!is.na(crimgroups)) %>%
  group_by(mun_id, year, fuel_pipeline) %>%
  summarise(avg_pop = mean(Population, na.rm = T),
            avg_hom = mean(mortality_male_homicide_young, na.rm = T),
            avg_crime = mean(crimgroups, na.rm = T),
            oil_price_usd = mean(oil_price_usd, na.rm = T)) %>%
  mutate(avg_hom_rate = avg_hom / avg_pop * 100000,
         high_presence = ifelse(avg_crime == 2, 1, 0))

modmun_crimcomp1 <- felm(high_presence ~ fuel_pipeline, data = master_municipality_yearly)
modmun_crimcomp2 <- felm(high_presence ~ fuel_pipeline*oil_price_usd  | mun_id + year | 0 | mun_id, data = master_municipality_yearly)

texreg::texreg(list(modmun_crimcomp1, modmun_crimcomp2), digits = 6, include.ci = TRUE)

##Coefficient Plot

coefplot::multiplot(modmun_crimcomp1, modmun_crimcomp2, horizontal = F,
                    coefficients = c("fuel_pipeline",
                                     "fuel_pipeline:oil_price_usd"),
                    newNames = c("fuel_pipeline:oil_price_usd" = "Fuel Pipeline \n * Global Price",
                                 "fuel_pipeline" = "Fuel Pipeline"),
                    names = c("Mod1", "Mod2"),
                    zeroColor = "black",  pointSize = 2.3, lwdInner = 0.6) +
  scale_color_brewer(palette = "Set1") + theme_light()

###############################################
############ Main Text -- Table 1 #############                     
###############################################

modmun_crimcomp3 <- felm(avg_hom ~ high_presence  | mun_id + year | 0 | mun_id, data = master_municipality_yearly)
modmun_crimcomp4 <- felm(avg_hom ~ high_presence| 0 | 0 | mun_id, data = master_municipality_yearly)

texreg::texreg(list(modmun_crimcomp3, modmun_crimcomp4), digits = 6, include.ci = TRUE)